PS4

Problem 1

Create a class to represent Wald-style normal approximation Confidence Intervals. Do this using S4.

# rm(list=ls())

# create class
setClass("waldCI",
         slots = c(level = "numeric",
                   mean = "numeric",
                   sterr = "numeric",
                   lb = "numeric",
                   ub = "numeric"))
  1. For the waldCI class, define the following:
  1. A constructor, which takes in a confidence level (0,1) and either a mean and standard error, or a lower and upper bound. This should be a custom constructor, not new() or waldCI().
  2. A validator.

For this section, I created a validator first so that I can use it in the constructor.

# create validator for use in constructor
setValidity("waldCI", function(object) {
  
  # needed for part c
  
  # negative standard error
  if (object@sterr <= 0) {
    stop(paste("@sterr = ", object@sterr, " cannot be negative"))
  }
  
  # lb > ub
  if (object@lb >= object@ub) {
    stop(paste("@lb = ", object@lb, " cannot be larger than @ub = ", object@ub))
  }
  
  # infinite bounds
  if (!is.finite(object@lb) || !is.finite(object@ub)) {
    stop(paste("Bounds must be finite"))
  }
  
  return(TRUE)
})
Class "waldCI" [in ".GlobalEnv"]

Slots:
                                              
Name:    level    mean   sterr      lb      ub
Class: numeric numeric numeric numeric numeric
# constructor
waldCI <- function(level, mean = NULL, sterr = NULL, lb = NULL, ub = NULL) {
  
  # check level
  if (missing(level) || level <= 0 || level >= 1) {
    stop(paste("Confidence level must be a value between (0,1)"))
  }
  
  # fill in empty slots
  
  alpha <- 1 - level
  z <- qnorm(1 - alpha / 2)
  
  if (!is.null(mean) && !is.null(sterr)) {
    lb <- mean - z * sterr
    ub <- mean + z * sterr
  } else if (!is.null(lb) && !is.null(ub)) {
    mean <- (ub + lb) / 2
    sterr <- (ub - lb) / (2 * z)
  } else {
    stop("You must provide either (mean and se) or (lower and upper).")
  }
  
  object <- new("waldCI", level = level, mean = mean, sterr = sterr, lb = lb, ub = ub)
  validObject(object)
  return(object)
}
  1. A show method.
setGeneric("show",
           function(object, ...) {
             standardGeneric("show")
           })
Creating a new generic function for 'show' in the global environment
[1] "show"
setMethod("show", "waldCI",
  function(object, ...) {
    cat("Wald Confidence Interval\n")
    cat("  Confidence level: ", object@level, "\n")
    cat("  Mean: ", object@mean, "\n")
    cat("  Standard error: ", object@sterr, "\n")
    cat("  CI: [", object@lb, ", ", object@ub, "]\n")
    return(invisible(object))
  }
)
  1. Accessors: lb, ub, mean, sterr, level.
setGeneric("lb", function(object, ...) standardGeneric("lb"))
[1] "lb"
setMethod("lb", "waldCI", function(object, ...) object@lb)

setGeneric("ub", function(object, ...) standardGeneric("ub"))
[1] "ub"
setMethod("ub", "waldCI", function(object, ...) object@ub)

setGeneric("mean", function(object, ...) standardGeneric("mean"))
Creating a new generic function for 'mean' in the global environment
[1] "mean"
setMethod("mean", "waldCI", function(object, ...) object@mean)

setGeneric("sterr", function(object, ...) standardGeneric("sterr"))
[1] "sterr"
setMethod("sterr", "waldCI", function(object, ...) object@sterr)

setGeneric("level", function(object, ...) standardGeneric("level"))
[1] "level"
setMethod("level", "waldCI", function(object, ...) object@level)
  1. Setters: lb, ub, mean, sterr, level. Be sure to validate the resulting waldCI.
setGeneric("lb<-",
           function(object, value, ...) {
             standardGeneric("lb<-")
           })
[1] "lb<-"
setMethod("lb<-", "waldCI",
  function(object, value, ...) {
    object@lb <- value
    validObject(object)
    return(object)
  }
)

setGeneric("ub<-",
           function(object, value, ...) {
             standardGeneric("ub<-")
           })
[1] "ub<-"
setMethod("ub<-", "waldCI",
  function(object, value, ...) {
    object@ub <- value
    validObject(object)
    return(object)
  }
)

setGeneric("mean<-",
           function(object, value, ...) {
             standardGeneric("mean<-")
           })
[1] "mean<-"
setMethod("mean<-", "waldCI",
  function(object, value, ...) {
    object@mean <- value
    validObject(object)
    return(object)
  }
)

setGeneric("sterr<-",
           function(object, value, ...) {
             standardGeneric("sterr<-")
           })
[1] "sterr<-"
setMethod("sterr<-", "waldCI",
  function(object, value, ...) {
    object@sterr <- value
    validObject(object)
    return(object)
  }
)

setGeneric("level<-",
           function(object, value, ...) {
             standardGeneric("level<-")
           })
[1] "level<-"
setMethod("level<-", "waldCI",
  function(object, value, ...) {
    object@level <- value
    validObject(object)
    return(object)
  }
)
  1. A contains method, returning a logical of whether a value is within a CI.
setGeneric("contains", function(object, value, ...) standardGeneric("contains"))
[1] "contains"
setMethod("contains", "waldCI",
          function(object, value, ...) {
            return(value >= object@lb && value <= object@ub)
          })
  1. An overlap method, that takes in two waldCI’s, and returns a logical of whether the two confidence intervals overlap.
setGeneric("overlap", function(ci1, ci2, ...) standardGeneric("overlap"))
[1] "overlap"
setMethod("overlap", signature(ci1 = "waldCI", ci2 = "waldCI"),
          function(ci1, ci2, ...) {
            return(max(ci1@lb, ci2@lb) <= min(ci1@ub, ci2@ub))
          })
  1. as.numeric to return c(lb, ub). (Hint: The second argument of setGeneric is not needed when an existing s3 function uses the .Primitive function.)
setMethod("as.numeric", "waldCI", function(x, ...) {
  c(x@lb, x@ub)
})
  1. transformCI which takes in a function and a waldCI, and returns the transformed waldCI object. Warn the user that only monotonic functions make sense.
setGeneric("transformCI", function(object, foo, ...) standardGeneric("transformCI"))
[1] "transformCI"
setMethod("transformCI", signature(object = "waldCI", foo = "function"),
          function(object, foo, ...) {
            warning("Warning: Only monotonic functions make sense for transformations")
            
            new_lb <- foo(object@lb)
            new_ub <- foo(object@ub)
            
            waldCI(level = object@level,
                   lb = new_lb,
                   ub = new_ub)
            })
  1. Use your waldCI class to create three objects:
  • ci1: (17.2, 24.7), 95%

  • ci2: mean: 13, standard error: 2.5, 99%

  • ci3: (27.43, 39.22), 75%

ci1 <- waldCI(level = 0.95, lb = 17.2, ub = 24.7)
show(ci1)
Wald Confidence Interval
  Confidence level:  0.95 
  Mean:  20.95 
  Standard error:  1.9133 
  CI: [ 17.2 ,  24.7 ]
ci2 <- waldCI(level = 0.99, mean = 13, sterr = 2.5)
show(ci2)
Wald Confidence Interval
  Confidence level:  0.99 
  Mean:  13 
  Standard error:  2.5 
  CI: [ 6.560427 ,  19.43957 ]
ci3 <- waldCI(level = 0.75, lb = 27.43, ub = 39.22)
show(ci3)
Wald Confidence Interval
  Confidence level:  0.75 
  Mean:  33.325 
  Standard error:  5.12453 
  CI: [ 27.43 ,  39.22 ]

Evaluate the following code:

ci1
An object of class "waldCI"
Slot "level":
[1] 0.95

Slot "mean":
[1] 20.95

Slot "sterr":
[1] 1.9133

Slot "lb":
[1] 17.2

Slot "ub":
[1] 24.7
ci2
An object of class "waldCI"
Slot "level":
[1] 0.99

Slot "mean":
[1] 13

Slot "sterr":
[1] 2.5

Slot "lb":
[1] 6.560427

Slot "ub":
[1] 19.43957
ci3
An object of class "waldCI"
Slot "level":
[1] 0.75

Slot "mean":
[1] 33.325

Slot "sterr":
[1] 5.12453

Slot "lb":
[1] 27.43

Slot "ub":
[1] 39.22
as.numeric(ci1)
[1] 17.2 24.7
as.numeric(ci2)
[1]  6.560427 19.439573
as.numeric(ci3)
[1] 27.43 39.22
lb(ci2)
[1] 6.560427
ub(ci2)
[1] 19.43957
mean(ci1)
[1] 20.95
sterr(ci3)
[1] 5.12453
level(ci2)
[1] 0.99
lb(ci2) <- 10.5
mean(ci3) <- 34
level(ci3) <- .8
contains(ci1, 17)
[1] FALSE
contains(ci3, 44)
[1] FALSE
overlap(ci1, ci2)
[1] TRUE
eci1 <- transformCI(ci1, sqrt)
Warning in transformCI(ci1, sqrt): Warning: Only monotonic functions make sense
for transformations
eci1
An object of class "waldCI"
Slot "level":
[1] 0.95

Slot "mean":
[1] 4.558599

Slot "sterr":
[1] 0.2098562

Slot "lb":
[1] 4.147288

Slot "ub":
[1] 4.969909
mean(transformCI(ci2, log))
Warning in transformCI(ci2, log): Warning: Only monotonic functions make sense
for transformations
[1] 2.659343
  1. Show that your validator does not allow the creation of invalid confidence intervals:
  • negative standard error

  • lb > ub

  • Infinite bounds

  • invalid use of the setters

# negative sterr
try({waldCI(level = 0.95, mean = 10, sterr = -2)})
Error in validityMethod(object) : @sterr =  -2  cannot be negative
# lb > ub
try({waldCI(level = 0.95, lb = 10, ub = 5)})
Error in validityMethod(object) : 
  @sterr =  -1.27553364231164  cannot be negative
# infinite bounds
try({waldCI(level = 0.95, lb = -Inf, ub = Inf)})
Error in validityMethod(object) : Bounds must be finite
# invalid use of setters
try({
  ci <- waldCI(level = 0.95, mean = 10, sterr = 2)
  lb(ci) <- 15   # This should trigger validation failure if ub < lb now
  ub(ci) <- 5
  validObject(ci)
})
Error in validityMethod(object) : 
  @lb =  15  cannot be larger than @ub =  13.9199279690801

Problem 3

First, I load the ggplot2 library and the data. Currently, the “date” variable is a character variable, so I change the it to be of type Date.

# install.packages("plotly")
library(plotly)
Warning: package 'plotly' was built under R version 4.4.3
Loading required package: ggplot2
Warning: package 'ggplot2' was built under R version 4.4.3

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
library(dplyr)

Attaching package: 'dplyr'
The following object is masked _by_ '.GlobalEnv':

    contains
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
us_states <- read.csv("data/us-states.csv")
us_states$date <- as.Date(us_states$date)
  1. How many major and minor spikes in cases were there?
us_states_date <- us_states %>%
  group_by(date) %>%
  summarise(total_cases = sum(cases))
  
us_states_week <- us_states %>%
  mutate(week = floor_date(date, "week")) %>%
  group_by(week) %>%
  summarise(cases_week_avg = sum(cases) / 7)

p <- plot_ly(us_states_date, 
             x = ~date, y = ~total_cases,
             type = "scatter", mode = "markers",
             marker = list(color = "slategray", opacity = 0.3, size = 4),
             name = "Daily Cases") %>%
  add_trace(
    data = us_states_week,
    x = ~week, y = ~cases_week_avg,
    type = "scatter", mode = "lines",
    line = list(width = 3),
    name = "Weekly Average"
  ) %>%
  layout(
    title = "Major and Minor COVID-19 Case Spikes in the U.S.",
    xaxis = list(title = "Date"),
    yaxis = list(title = "Total Daily New Cases")
  )

p 
A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
# TODO: create color segments?
  1. For the states with the highest and lowest overall rates per population, what differences do you see in their trajectories over time?
spectral <- grDevices::hcl.colors(length(unique(us_states$state)), "Spectral")

p <- plot_ly(us_states, 
             x = ~date, y = ~cases_avg_per_100k,
             color = ~state, colors = spectral,
             type = "scatter", mode = "lines",
             line = list(width = 1.5), opacity = 0.5,
             showlegend = FALSE,
             text = ~paste(
               "State:", state,
               "<br>Date:", date,
               "<br>Cases per 100k:", round(cases_avg_per_100k, 2)),
             hoverinfo = "text") %>%
  layout(
    title = list(
      text = "COVID-19 Case Rates by State"
    ),
    xaxis = list(title = "Date"),
    yaxis = list(title = "Average Cases Per 100k Residents")
  )
p 
  1. Identify, to the best of your ability without a formal test, the first five states to experience Covid in a substantial way.
us_states_start <- us_states %>%
  filter(date > as.Date("2020-03-20"), 
         date < as.Date("2020-04-20"),
         cases_avg_per_100k > 15)

spectral <- grDevices::hcl.colors(length(unique(us_states_start$state)), "Spectral")

p <- plot_ly(us_states_start, 
             x = ~date, y = ~cases_avg_per_100k,
             color = ~state, colors = spectral,
             type = "scatter", mode = "lines",
             line = list(width = 2),
             text = ~paste(
               "State:", state,
               "<br>Date:", date,
               "<br>Cases per 100k:", round(cases_avg_per_100k, 2)),
             hoverinfo = "text") %>%
  layout(
    title = list(
      text = "COVID-19 Case Rates by State (Mar–Apr 2020)"
    ),
    xaxis = list(title = "Date"),
    yaxis = list(title = "Average Cases Per 100k Residents"),
    legend = list(title = list(text = "State"))
  )
p 

Attribution